#pragma once
#ifndef uintX2
#define uintX2

#include <iostream>
#include <time.h>
#include <stdlib.h>
#include <math.h>

//#define BYTEPERT sizeof(T)*8
typedef unsigned __int16 usize_t;
typedef unsigned __int64 uint64;

/** /
//debug
static int count1(0),count2(0);
/**/

#define LTR
#define KARATSUBA
//#define BARRET
//#define BITRANDOM

//class that contains two instance of unsigned integer number of specified types
template <typename T = unsigned __int64> class __uintX2
{
private:
	static const usize_t BYTEPERT=sizeof(T)*8;
public:
	//constructors
	__uintX2<T>(const T h,const T l):
	  _h(h),_l(l){}
	__uintX2<T>(const T l=T(0)):
	_h(T(0)), _l(l){}
	//__uintX2<T>(const __uintX2<T>& src):_h(src._h),_l(src._l){}
	//__uintX2<T>(const usize_t longpersize, const uint64* const srcbuf){}
	//comparison operators
	bool operator >(const __uintX2<T>& rhs) const{return cmp(rhs) >0;}
	bool operator>=(const __uintX2<T>& rhs) const{return cmp(rhs)>=0;}
	bool operator <(const __uintX2<T>& rhs) const{return cmp(rhs) <0;}
	bool operator<=(const __uintX2<T>& rhs) const{return cmp(rhs)<=0;}
	bool operator==(const __uintX2<T>& rhs) const{return cmp(rhs)==0;}
	bool operator!=(const __uintX2<T>& rhs) const{return cmp(rhs)!=0;}
	
	//bitwise operators
	inline __uintX2<T> operator& (const __uintX2<T>& rhs) const
	{
		return __uintX2<T>(_h&rhs._h,_l&rhs._l);
	}
	inline __uintX2<T>& operator&=(const __uintX2<T>& rhs)
	{
		_h&=rhs._h;_l&=rhs._l;
		return *this;
	}
	inline __uintX2<T> operator| (const __uintX2<T>& rhs) const
	{
		return __uintX2<T>(_h|rhs._h,_l|rhs._l);
	}
	inline __uintX2<T>& operator|=(const __uintX2<T>& rhs)
	{
		_h|=rhs._h;_l|=rhs._l;return *this;
	}
	inline __uintX2<T> operator^ (const __uintX2<T>& rhs) const
	{
		return __uintX2<T>(_h^rhs._h,_l^rhs._l);
	}
	inline __uintX2<T>& operator^=(const __uintX2<T>& rhs)
	{
		_h^=rhs._h;_l^=rhs._l;return *this;
	}
	inline __uintX2<T> operator~() const
	{
		return __uintX2<T>(~_h,~_l);
	}

	//shift operators
	inline __uintX2<T> operator<< (const usize_t shv) const
	{
		return __uintX2<T>(*this)<<=shv;
	}
	__uintX2<T>& operator<<=(const usize_t shv)
	{
		if(shv==0)
			return *this;
		//const usize_t BYTEPERT(sizeof(T)*8);
		if((shv>2*BYTEPERT) || (shv < 0))
		{
			throw "Shift value must be positive value less than size";
		}
		if(shv==2*BYTEPERT)
		{
			_l&=T(0);
			_h&=T(0);
		}
		else if(shv>BYTEPERT)
		{
			_h=(_l<<=(shv-BYTEPERT));
			_l&=T(0);
		}
		else if(shv==BYTEPERT)
		{
			_h=_l;
			_l&=T(0);
		}
		else
		{
			(_h<<=shv)|=(_l>>(BYTEPERT-shv));
			_l<<=shv;
		}
		return *this;
	}
	inline __uintX2<T> operator>> (const usize_t shv) const
	{
		return __uintX2<T>(*this)>>=shv;
	}
	__uintX2<T>& operator>>=(const usize_t shv)
	{
		if(shv==0)
			return *this;
		//const usize_t BYTEPERT(sizeof(T)*8);
		if((shv>2*BYTEPERT) || (shv < 0))
		{
			throw "Shift value must be positive value less than size";
		}
		if(shv==2*BYTEPERT)
		{
			_l&=T(0);
			_h&=T(0);
		}
		else if(shv>BYTEPERT)
		{
			_l=(_h>>=(shv-BYTEPERT));
			_h&=T(0);
		}
		else if(shv==BYTEPERT)
		{
			_l=_h;
			_h&=T(0);
		}
		else
		{
			(_l>>=shv)|=(_h<<(BYTEPERT-shv));
			_h>>=shv;
		}
		return *this;
	}

	//arithmetic operators
	//__uintX2<T> operator-(){return 0-(*this);}
	inline __uintX2<T>& operator++()
	{
		return (*this+=one());
	}
	__uintX2<T> operator++(int)
	{
		__uintX2<T> t=*this; ++*this; return t;
	}
	inline __uintX2<T>& operator--()
	{
		return ((*this)-=one());
	}
	__uintX2<T> operator--(int)
	{
		__uintX2<T> t=*this; --*this; return t;
	}
	inline __uintX2<T> operator+ (const __uintX2<T>& rhs) const
	{
		return __uintX2<T>(*this)+=rhs;
	}
	__uintX2<T>& operator+=(const __uintX2<T>& rhs)
	{
		bool c = false;
		if(addT(_l,rhs._l)) c = addT(_h,T(1));
		c |= addT(_h,rhs._h);
		return *this;
	}
	inline __uintX2<T> operator- (const __uintX2<T>& rhs) const
	{
		return __uintX2<T>(*this)-=rhs;
	}

#define CHECK_SUBSTRACTION
	__uintX2<T>& operator-=(const __uintX2<T>& rhs)
	{
		
		bool c = false;
		if(subT(_l,rhs._l)) c = subT(_h,T(1));
		c |= subT(_h,rhs._h);
		return *this;
	}

	inline __uintX2<T> operator* (const __uintX2<T>& rhs) const
	{
		return __uintX2<T>(*this)*=rhs;
	}
	__uintX2<T>& operator*=(const __uintX2<T>& rhs)
	{
		//const usize_t BYTEPERT(sizeof(T)*8);
		__uintX2<T> B = multT(_l,rhs._l);
		__uintX2<T> C = multT(_h,rhs._l);
		C += multT(_l,rhs._h);
		return (*this = B+(C<<BYTEPERT));
	}
	__uintX2<T> operator% (const __uintX2<T>& mod) const
	{
		return __uintX2<T>(*this)%=mod;
	}
	__uintX2<T>& operator%=(const __uintX2<T>& mod)
	{
		if(mod.is_zero()) throw "Division by zero";
		if(mod==one()) 
		{
			this->_h = T(0);
			this->_l = T(0);
			return *this;
		}
		const usize_t k=mod.size();
#ifdef BARRET
		while(size()>2*k)
#else /* BARRET */
		while(*this>=mod)
#endif /* BARRET */
		{
		   __uintX2<T> quot = __uintX2<T>(mod)<<(size()-k);
		   if(quot>*this) quot>>=1;
		   *this-=quot;
		}
#ifdef BARRET
		__uintX2<T> z=one(); z<<=2*k;z/=mod;
		__uintX2<T> d=one(); d<<=(k+1);
		unsigned char s=0;
		if(bit(k-1)) s=1;
		__uintX2<T> b=*this>>(k-1)+s;
#endif /* BARRET */
		return *this;
	}	
	__uintX2<T> operator/ (const __uintX2<T>& rhs) const
	{
		return __uintX2<T>(*this)/=rhs;
	}
	__uintX2<T>& operator/=(const __uintX2<T>& rhs)
	{
		if(rhs.is_zero()) throw "Division by zero";
		if(rhs==one()) return *this;
		__uintX2<T> rem = *this, res;
		usize_t div_size = rhs.size();
		while(rem>=rhs)
		{
			usize_t sh = rem.size() - div_size;
		   __uintX2<T> quot = __uintX2<T>(rhs)<<(sh);
		   if(quot>rem) 
		   {
			   quot>>=1;
			   --sh;
		   }
		   rem-=quot;
		   res += one()<<sh;
		}
		*this=res;
		return *this;
	}	
	static __uintX2<T> add_m(const __uintX2<T>& lhs, const __uintX2<T>& rhs, const __uintX2<T>& mod=_max())
	{
		__uintX2<__uintX2<T> > n(lhs);
		bool c = false;
		if(__uintX2<T>::addT(n._l._l,rhs._l)) c = __uintX2<T>::addT(n._l._h,T(1));
		c |= __uintX2<T>::addT(n._l._h,rhs._h);
		if(c) n._h = T(1);
		
		n%=mod;
		return n._l;
	}
	static __uintX2<T> sub_m(const __uintX2<T>& lhs, const __uintX2<T>& rhs, const __uintX2<T>& mod=_max())
	{
		if(lhs < rhs)
			return mod - (rhs - lhs) % mod;
		return (lhs - rhs) % mod;
	}

	static __uintX2<T> mult_m(const __uintX2<T>& lhs, const __uintX2<T>& rhs, const __uintX2<T>& mod=_max())
	{
		__uintX2<__uintX2<T> > n=__uintX2<T>::mult_uint(lhs,rhs);
		n%=mod;
		return n._l;
	}
	static __uintX2<T> sqr_m(const __uintX2<T>& n, const __uintX2<T>& mod=_max())
	{
		__uintX2<__uintX2<T> > n_l=__uintX2<T>::sqr_uint(n);
		n_l%=mod;
		return n_l._l;
	}
	static __uintX2<T> pow_m(const __uintX2<T>& x, const __uintX2<T>& y, const __uintX2<T>& mod=_max())
	{
		usize_t n=y.size();
		#ifndef LTR
		__uintX2<T> z=one(),q(x);
		if((y&one())==one()) z=x;

		for(usize_t i=1;i<n;++i)
		{
			q=sqr_m(q,mod);
			if(y.bit(i)) z=mult_m(z,q,mod);
		}
		#else /* LTR */
		__uintX2<T> z(x);
		for(int i=n-2;i>=0;--i)
		{
			z=sqr_m(z,mod);
			//curr<<=1;
			if(y.bit(i)) 
			{
				z=mult_m(z,x,mod);
			}
		}
		#endif /* LTR */
		return z;
	}
	__uintX2<T> inv(const __uintX2<T>& mod) const
	{
		if ((this->gcd(mod)!=one())||(mod==one())) return zero();
		/** /
		__uintX2<T> q, r, a = (*this) % mod, b = mod;
		__uintX2<T>  x = zero(),  x1 = zero(),  x2 = one();

		
		  while (b > 0) {

			q = a / b, r = a % b;

			x = sub_m(x2, q *  x1, mod); 

			a = b, b = r;

			x2 = x1, x1 = x;
		  }

		  return x2;
		 /**/
		  __uintX2<T> q, r, d = (*this) % mod, c = mod;
		__uintX2<T>  a = one(), a1 = zero();
		//, b1 = one(), b = zero();

		while(true)
		{
			q = c / d, r = c % d;
			
			if(r.is_zero()) return a;

			c = d, d = r;

			__uintX2<T> t;
			t = a1, a1 = a;
			
			a =  sub_m(t, q * a, mod); 
			//a = (t < q * a) ? sub_m(q * a, t, mod) : sub_m(t, q * a, mod); 
			//t = b1, b1 = b, b = t - q * b;
		}
		/**/
	}
	static __uintX2<T> random()
	{
		//srand((int)time(NULL));
		__uintX2<T> res(T(0));
#ifndef BITRANDOM
		usize_t rand_size = __uintX2<T>(T(RAND_MAX)).size();
		usize_t n = (usize_t)ceil((float)2*BYTEPERT/rand_size);
		for(usize_t i = 0; i < n; ++i)
		{
			res+=__uintX2<T>(T(rand()))<<rand_size*i;
		}
#else  /* BITRANDOM */
		for(usize_t i(BYTEPERT*2);i>=1;--i)
		{
			res|=__uintX2((T)(rand()%2))<<(i-1);
		}
#endif /* BITRANDOM */
		return res;
	}

	inline static __uintX2<T> gcd(const __uintX2<T>& n,const __uintX2<T>& m)
	{
		return n.gcd(m);
	}
	
	static signed __int8 jacobi_symbol(const __uintX2<T>& n,const __uintX2<T>& m)
	{
		return n.jacobi_symbol(m);
	}
	usize_t size() const
	{
		/** /
		if(is_zero()) return 0;

		usize_t span = BYTEPERT, s = BYTEPERT;
		while(span >= 1)
		{
			span >>= 1;
			switch (cmp(one() << s))
			{
			case 0: return ++s;
			case 1:	s += span; break;
			case -1: s -= span; break;
			}
		}
		return ++s;
		/**/
		/**/
		if(is_zero()) return 0;
		
		__uintX2<T> k(T(0));
		--k;

		usize_t size(BYTEPERT*2+1);
		for(;size>=1;--size)
		{
			if(cmp(k)>0) break;
			k>>=1;
		}
		return size;
		/**/
	}
	usize_t weight() const
	{
		if(is_zero()) return 0;
		usize_t weight=0;

		__uintX2<T> own(*this);
		for(usize_t i=0;i<BYTEPERT*2;++i)
		{
			if(!(own & one()).is_zero()) ++weight;
			own>>=1;
		}
		return weight;
	}
	
	//operator unsigned char(){return _l;}
	template <typename T> friend std::ostream& operator<<(std::ostream& stream, __uintX2<T>);
	T _h,_l;

protected:
	static bool addT(T& acc, const T b)
	{
		//const usize_t BYTEPERT(sizeof(T)*8);
		const T oneT_1 = (T(1)<<(BYTEPERT-1)),
		        zeroT_1 = (T(1)<<(BYTEPERT-1)) - T(1);

		bool c = false;
		/**/
		switch((acc>>(BYTEPERT-1)) + (b>>(BYTEPERT-1)))
		{
		case 0:
			acc += b;
			break;
		case 1:
			(acc &= zeroT_1)+=(b & zeroT_1);
			if((acc>>(BYTEPERT-1)) == T(0))
			{
				acc |= oneT_1;
			}
			else
			{
				acc &= zeroT_1;
				c = true;
			}
			break;
		case 2:
			c = true;
			(acc &= zeroT_1)+=(b & zeroT_1);
			break;
		}
		/** /
		T a_over = acc>>(BYTEPERT-1),
		  b_over = b>>(BYTEPERT-1);
		if(a_over == T(0))
		{
			if(b_over == T(0))
			{
				acc += b;
			}
			else
			{
				(acc &= zeroT_1)+=(b & zeroT_1);
				acc |= oneT_1;
			}
		}
		else
		{
			if(b_over == T(0))
			{
				(acc &= zeroT_1)+=(b & zeroT_1);
				acc &= zeroT_1;
				c = true;
			}
			else
			{
				c = true;
				(acc &= zeroT_1)+=(b & zeroT_1);
			}
		}
		/**/
		return c;
	}
	static bool subT(T& a, const T b)
	{
		bool c = false;
		if(b > a)
		{
			a = ~(b-a)+T(1);
			c = true;
		}
		else a-=b;
		return c;
	}
	static __uintX2<T> multT(const T a, const T b) 
	{
		//const usize_t HALFT(sizeof(T)*4);
		const usize_t  HALFT(BYTEPERT/2);
		const T half = (T(1)<<HALFT) - T(1);
		T A = ((a>>HALFT)*(b>>HALFT));
		T B = ((a&half)*(b&half));
		__uintX2<T> C((a>>HALFT)*(b&half));
		C += __uintX2<T>((a&half)*(b>>HALFT));
		return __uintX2<T>(A,B)+=(C<<HALFT);
	}
	static __uintX2<T> sqrT(const T a)
	{
		//const usize_t HALFT(sizeof(T)*4);
		const usize_t HALFT(BYTEPERT/2);
		const T half = (T(1)<<HALFT) - T(1);
		T A = a>>HALFT;
		T B = a&half;
		__uintX2<T> C=A*B;A*=A;B*=B;
		return __uintX2<T>(A,B)+=(C<<(HALFT+1));
	}
	static __uintX2<__uintX2<T> > mult_uint(const __uintX2<T>& a, const __uintX2<T>& b)
	{
#ifndef KARATSUBA
		__uintX2<T> A = __uintX2<T>::multT(a._h,b._h);
		__uintX2<T> B = __uintX2<T>::multT(a._l,b._l);
		__uintX2<T> C = __uintX2<T>::multT(a._h,b._l);
		__uintX2<T> D = __uintX2<T>::multT(a._l,b._h);

		bool c = false;
		if(__uintX2<T>::addT(C._l,D._l)) c = __uintX2<T>::addT(C._h,T(1));
		c |= __uintX2<T>::addT(C._h,D._h);
		if(c) ++A._h;

		
		if(__uintX2<T>::addT(B._h,C._l))
			++A;
		A+=__uintX2<T>(C._h);
		/*
		|----T----|----T----|----T----|----T----|
		|_________A_________|_________B_________|
		|        |__________C__________|        |
		|        c                              |
		|----T----|----T----|----T----|----T----|
		*/
		return __uintX2<__uintX2<T> >(A,B);
#else /* KARATSUBA */
		__uintX2<T> A = __uintX2<T>::multT(a._h,b._h);
		__uintX2<T> B = __uintX2<T>::multT(a._l,b._l);
		
		__uintX2<T> Ca(a),Cb(b);
		bool c1 = __uintX2<T>::addT(Ca._l,Ca._h);
		bool c2 = __uintX2<T>::addT(Cb._l,Cb._h);

		__uintX2<T> C=__uintX2<T>::multT(Ca._l,Cb._l); 

		T K(0);
		if(c1) 
			if(__uintX2<T>::addT(C._h,Cb._l))
				++K;
		if(c2) 
			if(__uintX2<T>::addT(C._h,Ca._l))
				++K;
		if(c1&&c2) ++K;
		/*
		|----K----|---------C---------|
		|----T----|----T----|----T----|
		|---------|----Ca._l*Cb._l----|
		+         |c1->Cb._l|---------|
		+         |c2->Ca._l|---------|
		+        c1&c2                |
		|----T----|----T----|----T----|
		*/
		bool c = false;
		if(__uintX2<T>::subT(C._l,A._l)) c = __uintX2<T>::subT(C._h,T(1));
		c |= __uintX2<T>::subT(C._h,A._h);
		if(c) --K;

		c=false;
		if(__uintX2<T>::subT(C._l,B._l)) c = __uintX2<T>::subT(C._h,T(1));
		c |= __uintX2<T>::subT(C._h,B._h);
		if(c) --K;

		if(K==T(1)) ++A._h;
		
		if(__uintX2<T>::addT(B._h,C._l))
			++A;
		A+=__uintX2<T>(C._h);
		/*
		|----T----|----T----|----T----|----T----|
		|_________A_________|_________B_________|
		|        |__________C__________|        |
		|        c                              |
		|----T----|----T----|----T----|----T----|
		*/
		return __uintX2<__uintX2<T> >(A,B);
#endif /* KARATSUBA */
	}


	static __uintX2<__uintX2<T> > sqr_uint(const __uintX2<T>& a)
	{
		//const usize_t BYTEPERME(sizeof(__uintX2<T>)*8);
		const usize_t BYTEPERME(BYTEPERT*2);

		__uintX2<T> A = __uintX2<T>::sqrT(a._h);
		__uintX2<T> B = __uintX2<T>::sqrT(a._l);
		__uintX2<T> C = __uintX2<T>::multT(a._h,a._l);

		bool c = false;
		if((C>>(BYTEPERME-1))==one()) ++A._h;
		C<<=1;
		
		if(__uintX2<T>::addT(B._h,C._l))
			++A;
		A+=__uintX2<T>(C._h);
		/*
		|----T----|----T----|----T----|----T----|
		|_________A_________|_________B_________|
		|        |__________C__________|        |
		|        c                              |
		|----T----|----T----|----T----|----T----|
		*/

		return __uintX2<__uintX2<T> > (A,B);
	}
	__uintX2<T> gcd(const __uintX2<T>& rhs) const
	{
		__uintX2<T> n(*this),m(rhs);
		if(n.is_zero())
			if(!m.is_zero()) return m;
			else throw "There is no gdc between two zeroes";
		if(m.is_zero())	return n;
		usize_t d(0);
		while ((m & one()).is_zero() && (n & one()).is_zero()) { m >>= 1; n >>= 1; ++d; }
		while ((m & one()).is_zero()) m >>= 1;
		while ((n & one()).is_zero()) n >>= 1;
		while (m!=n)
		if (m < n) 
		{
			n -= m;
			do  n >>= 1;  while ((n & one()).is_zero());
		} 
		else
		{
			m -= n;
			do  m >>= 1;  while ((m & one()).is_zero());
		} 
		return (m <<= d);
	}
	signed __int8 jacobi_symbol(const __uintX2<T>& rhs) const
	{
		if((rhs & one()).is_zero()) 
			throw "Denominator of jacobi symbol should be even number";
		if(gcd(rhs)!=one()) return 0;

		__uintX2<T> n(*this),m(rhs);
		signed __int8 r=1;
		while(true)
		{
			n%=m;
			signed __int8 k=0;
			while ((n & one()).is_zero()) 
			{
				n >>= 1; 
				k=1-k;
			}
			if(k==1)
				if((m%__uintX2<T>(T(8))==__uintX2<T>(T(3)))||
				   (m%__uintX2<T>(T(8))==__uintX2<T>(T(5)))) r=-r;
			if(n==one()) return r;
			if(n==m-one())
			{
				if(m%__uintX2<T>(T(4))==one()) return r;
				if(m%__uintX2<T>(T(4))==__uintX2<T>(T(3))) return -r;
			}

			if((((m>>1)*(n>>1))&one())==one()) 
			  r=-r;
			__uintX2<T> t=n;n=m;m=t;
		}
	}
	signed __int8 __uintX2<T>::cmp(const __uintX2<T>& rhs) const
	{
		if(_h>rhs._h) return 1;
		else if(_h<rhs._h) return -1;
		else if (_l>rhs._l) return 1;
		else if(_l<rhs._l) return -1;
		else return 0;
	}
	inline bool bit(usize_t i) const
	{
		__uintX2<T> un=one();
		un <<=i;
		un &=(*this);
		un>>=i;
		return un==one();
		//return (((un<<=i)&=(*this))>>=i)==one();
	}
	inline bool is_zero() const
	{
		return (_l==T(0)) && (_h==T(0));
	}
	inline static __uintX2<T> _max()
	{
		//__uintX2<T> res(0);
		//for(usize_t i(BYTEPERT*2);i>=1;--i)	res|=one()<<(i-1);
		return __uintX2<T>(T(0))-one();
	}
public:
	inline static __uintX2<T> one()
	{
		return __uintX2<T>(T(1));
	}
	inline static __uintX2<T> zero()
	{
		return __uintX2<T>(T(0));
	}
};

template <typename T> std::ostream& operator<<(std::ostream& stream, __uintX2<T> n)
{
	if(n._h==T(0)) stream<<n._l;
	else stream<<'('<<n._h<<';'<<n._l<<')';
	return stream;
}
typedef __uintX2<> uint128;

/** /
	//public methods tests
    typedef __uintX2<__uintX2<> >  test_uint;
    //__uintX2<__uintX2<> > aa(1,2),ab(3,4),bb(5,6),cc(7,8);
	//test_uint a(aa,bb),b(bb),c(cc);
	test_uint a(1,2),b(3),c(4);
	b>a;b>=a;b<a;b<=a;b==a;b!=a;
	b&a;b&=a;b|a;b|=a;b^a;b^=a;~a;
	b<<2;b<<=2;a>>2;a>>=2;
	b++;++a;b--;--a;
	b+a;b+=a;b-a;b-=a;b*a;b*=a;b/a;b/=a;b%a;b%=a;
	test_uint::add_m(a,b,c);test_uint::add_m(a,b);test_uint::sub_m(a,b,c);
	test_uint::mult_m(a,b,c);test_uint::sqr_m(a,c);test_uint::pow_m(a,b,c);test_uint::gcd(a,b);
	test_uint::random();test_uint::jacobi_symbol(a, (b<<=1)+test_uint(1));
	a.size(); a.weight(); a.inv(b);
    //end of test 
/**/
#endif /* uintX2 */


/* 
TODO:
1.check the size function (reduce to binary)
2.check the inv function (unpredictably results sometimes appear)

	uint128 p(13420512395579704375ULL, 15834288541366907109ULL),
	uint128 p(1342051235ULL, 15834288541366907109ULL),
    alpha(14);
	uint128 _a = alpha.inv(p);
	uint128 aa = uint128::mult_m(alpha, _a, p); 
	assert(aa == 1);
*/

// for converting text files into MacOS/X or Linux from Windows execute the following:
// tr -d '\r' < file1 > file2 
// this deletes all the symbols '\r' from file1 and puts changed content into file2